
version 15

clear
set more off

cd "TO COMPLETE"

********************************************************************************
***2017 data***

import excel "Raw data\Pres 2017 participation.xlsx", sheet("Pres 2017 participation") firstrow

*change structure dataset
reshape long turnout1_ turnout2_, i(code_dep nom_dep) j(time)

rename turnout1_ turnout1
rename turnout2_ turnout2

reshape long turnout, i(code_dep nom_dep time) j(tour)

*gen year variable
gen year=2017

*order variables
order code_dep nom_dep year tour time turnout

*drop DOM/TOM
drop if regexm(code_dep, "Z")==1


*save .dta
save "Raw data\Pres 2017 participation.dta", replace

***2022 data***

import excel "Raw data\Pres 2022 participation.xlsx", ///
 sheet("Feuil1") cellrange(A4:H112) firstrow clear 

rename A code_dep
rename DEPARTEMENT nom_dep
rename h turnout2022_1_12
rename D turnout2022_1_17
rename Déf turnout2022_1_20
rename F turnout2022_2_12
rename G turnout2022_2_17
rename H turnout2022_2_20

*change structure dataset
reshape long turnout2022_1_ turnout2022_2_ , i(code_dep nom_dep) j(time)

rename turnout2022_1_ turnout2022_1
rename turnout2022_2_ turnout2022_2

reshape long turnout2022_, i(code_dep nom_dep time) j(tour)

rename turnout2022_ turnout2022

reshape long turnout, i(code_dep nom_dep time tour) j(year)

*order variables
order code_dep nom_dep year tour time turnout

*drop DOM/TOM
drop if regexm(code_dep, "Z")==1

drop if turnout ==.

save "Raw data\Pres 2022 participation.dta", replace
************** Voten  outcomes *********************************************
clear
import excel "Raw data\votes by department.xlsx", sheet("votes by department") firstrow

save "Raw data\votes by department.dta", replace

***** weather data ***********
clear all

import excel "Raw data\temperatures.xlsx", sheet("Tabelle1") firstrow

rename Code code_dep

save "Raw data\temperature.dta", replace

********************************************************************************
***2002 to 2012 data

clear 
import excel "Raw data\Pres 2002 2007 2012 participation.xlsx", sheet("Pres 2002 2007 2012 participati") firstrow

*change structure dataset
reshape long turnout2002_1_ turnout2002_2_ turnout2007_1_ turnout2007_2_ turnout2012_1_ turnout2012_2_, i(code_dep nom_dep) j(time)

foreach var in turnout2002_1 turnout2002_2 turnout2007_1 turnout2007_2 turnout2012_1 turnout2012_2 {
rename `var'_ `var'
}

reshape long turnout2002_ turnout2007_ turnout2012_, i(code_dep nom_dep time) j(tour)

rename turnout2002_ turnout2002
rename turnout2007_ turnout2007
rename turnout2012_ turnout2012

reshape long turnout, i(code_dep nom_dep time tour) j(year)

*order variables
order code_dep nom_dep year tour time turnout

*drop DOM/TOM
drop if regexm(code_dep, "Z")==1


*save .dta
save "Raw data\Pres 2002 2007 2012 participation.dta", replace

append using "Raw data\Pres 2017 participation.dta"

append using "Raw data\Pres 2022 participation.dta"
sort code_dep nom_dep year tour time


************************** Merge with weather data *****************************
merge m:1 code_dep using "Raw data\temperature.dta"
** overseas departments
drop if _merge ==2 
drop _merge

replace code_dep = "100" if code_dep =="2A"
replace code_dep = "101" if code_dep =="2B"
destring code_dep, replace

************************** Merge with voting data *****************************
merge m:1 code_dep using "Raw data\votes by department.dta"

************************* Geography ********************************************
gen veryclosebe=0
replace veryclosebe = 1 if code_dep == 59 | code_dep == 2 | code_dep == 8 | code_dep == 62 
**8. Nord 59 -Aisne 2 -Ardennes 8 -Pas de Calais 62
gen closebe = 0
replace closebe = 1 if code_dep == 59 | code_dep == 2 | code_dep == 8 | code_dep ==  55 | code_dep == 54 | code_dep == 62 | code_dep == 80 
**9. Nord 59 -Aisne 2 -Ardennes 8 -Meuse 55 -Meurthe et Moselle  54 -Pas de Calais 62 -Somme 80 
gen veryclosech=0
replace veryclosech = 1 if code_dep==74 | code_dep == 1 | code_dep == 39 | code_dep == 25 
**10. Haute Savoie 74-Ain 1-Jura 39-Doubs 25
gen closech = 0
replace closech = 1 if code_dep == 74 | code_dep == 1 | code_dep == 39 | code_dep == 25  | code_dep == 90 | code_dep == 68 | code_dep==73
**11. Haute Savoie 74-Ain 1-Jura 39-Doubs 25-Belfort 90-Haut Rhin 68 -Savoie 73

**************************** Weather variables ***********************************

gen rainfall = rainfall_turn1 if year ==2017 & tour ==1
replace rainfall = rainfall_turn2 if year ==2017 & tour ==2
gen tempmax = tempmax_turn1 if year ==2017 & tour ==1
replace tempmax = tempmax_turn2 if year ==2017 & tour ==2
gen tempmin = tempmin_turn1 if year ==2017 & tour ==1
replace tempmin = tempmin_turn2 if year ==2017 & tour ==2

gen no_rainfall = (rainfall==0)
egen never_rain = min(no_rainfall) if year==2017, by(code_dep)
egen tempmax_std = std(tempmax)

gen raining = (rainfall >0) if year==2017


**************************** Voting preferences ***********************************

gen mlp_first = (MLP_1 > EM_1 & MLP_1 >FF_1 & MLP_1 > JLM_1 & MLP_1 > BH_1)
gen em_first = (EM_1 > MLP_1 & EM_1 >FF_1 & EM_1 > JLM_1 & EM_1 > BH_1)

** top left departments
gen left_votes = JLM_1 + BH_1
xtile pc = left_votes, nq(4)
gen left_deps = (pc>2)

** top fillon departmens
xtile pc_ff = FF_1 , nq(4)
gen right_deps = (pc_ff>2)


************************** Lables **********************************************
label var turnout "Turnout"
label var time "Time"
label var tour "Round"
label var year "Year"
label var rainfall "Rainfall"
label var tempmax "Maximum Temperature"
label var tempmin "Minimum Temperature"
label var never_rain "No rain"
label define never_rain 0 "rain" 1 "Never rained"
label val never_rain never_rain
label var tempmax_std "Max. temperture (std)"


label var raining "Rain"
label define raining 1 "Rain" 
label values raining raining

label define time 12 "12h" 17 "17h" 20 "Closure"
label values time time

label define tour 1 "1st Round" 2 "2nd Round"
label values tour tour

label define year 2002 "2002" 2007 "2007" 2012 "2012" 2017 "2017"
label values year year

label var mlp_first "Le Pen first"
label define mlp_first 1 "Le Pen first" 
label values mlp_first mlp_first

label var em_first "Macron first"
label define em_first 1 "Macron first"
label values em_first em_first

label var left_deps "Left Top 50\%"
label define left_deps 1 "Left Top 50\%"
label values left_deps left_deps

label var right_deps "Fillon Top 50\%"
label define right_deps 1 "Fillon Top 50\%"
label values right_deps right_deps

label var veryclosebe "Very close BE"
label define veryclosebe 1 "Very close BE" 
label val  veryclosebe veryclosebe

label var closebe "Close BE"
label define closebe 1 "Close BE"
label val  closebe closebe

label var veryclosech "Very close CH"
label define veryclosech  1 "Very close CH"
label val veryclosech  veryclosech 

label var closech "Close CH"
label define closech 1 "Close CH"
label val closech closech


************************** Weights *********************************************

gen weight = .
egen sum_turn1= sum(inscrit_1) if year==2017 & time ==20 & tour==1 
egen sum_turn2= sum(inscrit_2) if year==2017 & time ==20 & tour==2 
egen sum_turn2022_1= sum(inscrit2022_1) if year==2022 & time ==20 & tour==1 
egen sum_turn2022_2= sum(inscrit2022_2) if year==2022 & time ==20 & tour==2 
egen sum_turn2012_1= sum(inscrit2012_1) if year==2012 & time ==20 & tour==1 
egen sum_turn2012_2= sum(inscrit2012_2) if year==2012 & time ==20 & tour==2 
egen sum_turn2007_1= sum(inscrit2007_1) if year==2007 & time ==20 & tour==1 
egen sum_turn2007_2= sum(inscrit2007_2) if year==2007 & time ==20 & tour==2 


gen weight_1 = inscrit_1 / sum_turn1  if year==2017 & time ==20 & tour==1 
replace weight_1 = inscrit_2 / sum_turn2  if year==2017 & time ==20 & tour==2
replace weight_1 = inscrit2012_1 / sum_turn2012_1  if year==2012 & time ==20 & tour==1
replace weight_1 = inscrit2012_2 / sum_turn2012_2  if year==2012 & time ==20 & tour==2
replace weight_1 = inscrit2007_1 / sum_turn2007_1  if year==2007 & time ==20 & tour==1
replace weight_1 = inscrit2007_2 / sum_turn2007_2  if year==2007 & time ==20 & tour==2
replace weight_1 = inscrit2022_1 / sum_turn2022_1  if year==2022 & time ==20 & tour==1
replace weight_1 = inscrit2022_2 / sum_turn2022_2  if year==2022 & time ==20 & tour==2


egen weight_both = max(weight_1), by(year tour code_dep)

drop _merge
save "Cleaned data\turnout.dta", replace

